Objectives:
import torch
import torchvision
import torchvision.transforms as transforms
torch.cuda.is_available()
# Output of torchvision datasets are PILImage images of range [0, 1]. We transform them to Tensors of normalized range [-1, 1]
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# Create the data loaders for the train and test sets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=8, shuffle=False, num_workers=2)
# The 10 classes of CIFAR-10
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
# Visualizing some sample images
import matplotlib.pyplot as plt
import numpy as np
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
import torch.nn as nn
import torch.nn.functional as F
# Define the convolutional network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')
self.conv1 = nn.Conv2d(3, 6, 2)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 10, 2)
self.conv3 = nn.Conv2d(10, 16, 2)
self.fc1 = nn.Linear(16 * 3 * 3, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 50)
self.fc4 = nn.Linear(50, 25)
self.fc5 = nn.Linear(25, 10)
def forward(self, x):
#(32 - kernel + 2*padding) / stride + 1
x = self.pool(F.relu(self.conv1(x))) #15
x = self.pool(F.relu(self.conv2(x))) #7
x = self.pool(F.relu(self.conv3(x))) #3
x = x.view(-1, 16 * 3 * 3)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = self.fc5(x)
return x
# Check if the GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# Create the network on the CPU/GPU
net = Net().to(device)
# Define the loss and the optimizer
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
def optimizer_func(net, optimize):
if optimize == 'SGD':
return optim.SGD(net.parameters(), lr=0.0009, momentum=0.9)
elif optimize == 'Adam':
return optim.Adam(net.parameters(), lr=0.0009)
elif optimize == 'RMSProp':
return optim.RMSProp(net.parameters(), lr=0.0009, momentum=0.9)
optimizer = optimizer_func(net, 'Adam')
# Change to other optimizer like RMSProp or Adam
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data[0].to(device), data[1].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
# Save the trained model
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)
# Check the testset
dataiter = iter(testloader)
images, labels = dataiter.next()
images = images.to(device)
labels = labels.to(device)
# print images
imshow(torchvision.utils.make_grid(images.cpu()))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
# Load the saved weights and create the network
net = Net().to(device)
net.load_state_dict(torch.load(PATH))
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
for j in range(4)))
# Check the accuracy on the test set
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data[0].to(device), data[1].to(device)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
# Check the accuracy for each class
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in testloader:
images, labels = data[0].to(device), data[1].to(device)
outputs = net(images)
_, predicted = torch.max(outputs, 1)
c = (predicted == labels).squeeze()
for i in range(4):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
print(net)
%pip install torchsummary
from torchsummary import summary
summary(net, (3, 32, 32)) # (channels, height, width of input image)
from torchvision import models
import torch
if torch.cuda.is_available():
device = 'cuda'
else:
device = 'cpu'
# Create a vgg16 network with pretrained weights (trained on ImageNet)
model_vgg16 = models.vgg16_bn(pretrained=True).to(device)
print(model_vgg16)
# Prevent the trained weights from being modified
for param in model_vgg16.features.parameters():
param.requires_grad = False
# Compute the input feature size for the first fc layer
num_ftrs = list(model_vgg16.classifier.children())[0].in_features
print(num_ftrs)
features = [nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)]
# I added more layers
# classifier = [nn.Linear(in_features=num_ftrs, out_features=4096).to(device),
# nn.ReLU(inplace=True).to(device),
# nn.Dropout(p=0.5, inplace=False).to(device),
# nn.Linear(in_features=4096, out_features=2048).to(device),
# nn.ReLU(inplace=True).to(device),
# nn.Dropout(p=0.5, inplace=False).to(device),
# nn.Linear(in_features=2048, out_features=1024).to(device),
# nn.ReLU(inplace=True).to(device),
# nn.Dropout(p=0.5, inplace=False).to(device),
# nn.Linear(in_features=1024, out_features=512).to(device),
# nn.ReLU(inplace=True).to(device),
# nn.Dropout(p=0.5, inplace=False).to(device),
# nn.Linear(in_features=512, out_features=256).to(device),
# nn.ReLU(inplace=True).to(device),
# nn.Dropout(p=0.5, inplace=False).to(device),
# nn.Linear(in_features=256, out_features=len(classes), bias=True).to(device)]
#Instead of replacing all fc layers, replace the last one
model_vgg16.classifier[-1] = nn.Linear(4096, len(classes)).to(device)
#model_vgg16.classifier = nn.Sequential(*classifier).to(device) # Replace the model classifier
model_vgg16.features = nn.Sequential(*features).to(device)
print(model_vgg16)
summary(model_vgg16, (3, 32, 32)) # Input size: (channels, height, width of image)
# Define the loss and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer_vgg16 = optim.Adam(model_vgg16.parameters(), lr=0.005)
for epoch in range(4): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data[0].to(device), data[1].to(device)
# zero the parameter gradients
optimizer_vgg16.zero_grad()
# forward + backward + optimize
outputs = model_vgg16(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer_vgg16.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
# Check the accuracy of the finetuned network on the test set
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data[0].to(device), data[1].to(device)
outputs = model_vgg16(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
def plot_filters_single_channel(t):
#kernels depth * number of kernels
nplots = t.shape[0]*t.shape[1]
ncols = 12
nrows = 1 + nplots//ncols
#convert tensor to numpy image
npimg = np.array(t.numpy(), np.float32)
count = 0
fig = plt.figure(figsize=(ncols, nrows))
#looping through all the kernels in each channel
for i in range(t.shape[0]):
for j in range(t.shape[1]):
count += 1
ax1 = fig.add_subplot(nrows, ncols, count)
npimg = np.array(t[i, j].numpy(), np.float32)
npimg = (npimg - np.mean(npimg)) / np.std(npimg)
npimg = np.minimum(1, np.maximum(0, (npimg + 0.5)))
ax1.imshow(npimg)
ax1.set_title(str(i) + ',' + str(j))
ax1.axis('off')
ax1.set_xticklabels([])
ax1.set_yticklabels([])
plt.tight_layout()
plt.show()
def plot_filters_multi_channel(t):
#get the number of kernals
num_kernels = t.shape[0]
#define number of columns for subplots
num_cols = 12
#rows = num of kernels
num_rows = num_kernels
#set the figure size
fig = plt.figure(figsize=(num_cols,num_rows))
#looping through all the kernels
for i in range(t.shape[0]):
ax1 = fig.add_subplot(num_rows,num_cols,i+1)
#for each kernel, we convert the tensor to numpy
npimg = np.array(t[i].cpu().numpy(), np.float32)
#standardize the numpy image
npimg = (npimg - np.mean(npimg)) / np.std(npimg)
npimg = np.minimum(1, np.maximum(0, (npimg + 0.5)))
npimg = npimg.transpose((1, 2, 0))
ax1.imshow(npimg)
ax1.axis('off')
ax1.set_title(str(i))
ax1.set_xticklabels([])
ax1.set_yticklabels([])
plt.savefig('myimage.png', dpi=100)
plt.tight_layout()
plt.show()
def plot_weights(model, layer_num, single_channel = True, collated = False):
#extracting the model features at the particular layer number
layer = model.features[layer_num]
#checking whether the layer is convolution layer or not
if isinstance(layer, nn.Conv2d):
#getting the weight tensor data
weight_tensor = model.features[layer_num].weight.data
if single_channel:
if collated:
plot_filters_single_channel_big(weight_tensor)
else:
plot_filters_single_channel(weight_tensor)
else:
if weight_tensor.shape[1] == 3:
plot_filters_multi_channel(weight_tensor)
else:
print("Can only plot weights with three channels with single channel = False")
else:
print("Can only visualize layers which are convolutional")
#visualize weights for alexnet - first conv layer
plot_weights(model_vgg16, 0, single_channel = False)
model_resnet = models.resnet18(pretrained=True)
print(model_resnet)
model_resnet.fc = nn.Linear(512, len(classes))
model_resnet
model_weights = [] # we will save the conv layer weights in this list
conv_layers_r = [] # we will save the 49 conv layers in this list
# get all the model children as list
model_children = list(model_resnet.children())
# counter to keep count of the conv layers
counter = 0
# append all the conv layers and their respective weights to the list
# going through all the layers of the ResNet-50 model
for i in range(len(model_children)):
#checks if any of the direct children of the model is a convolutional layer
if type(model_children[i]) == nn.Conv2d:
counter += 1
model_weights.append(model_children[i].weight)
conv_layers_r.append(model_children[i])
#check whether any of the Bottleneck layer inside the Sequential blocks contain any convolutional layers
elif type(model_children[i]) == nn.Sequential:
for j in range(len(model_children[i])):
for child in model_children[i][j].children():
if type(child) == nn.Conv2d:
counter += 1
model_weights.append(child.weight)
conv_layers_r.append(child)
print(f"Total convolutional layers: {counter}")
# take a look at the conv layers and the respective weights
for weight, conv in zip(model_weights, conv_layers):
# print(f"WEIGHT: {weight} \nSHAPE: {weight.shape}")
print(f"CONV: {conv} ====> SHAPE: {weight.shape}")
# visualize the first conv layer filters
plt.figure(figsize=(20, 17))
#iterating through the weights of the first convolutional layer
for i, filter in enumerate(model_weights[0]):
plt.subplot(8, 8, i+1) # (8, 8) because in conv0 we have 7x7 filters and total of 64 (see printed shapes)
#The output is going to be 64 filters of 7×7 dimensions. The 64 refers to the number of hidden units in that layer.
plt.imshow(filter[0, :, :].detach(), cmap='gray')
plt.axis('off')
#saving the filter plot as filter.png
plt.savefig('/content/outputs/filter.png')
plt.show()
print(f'CNN for resnet {counter}')
import cv2 as cv
img=cv.imread("/content/84540021.JPG")
img=cv.cvtColor(img,cv.COLOR_BGR2RGB)
plt.imshow(img)
plt.show()
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomResizedCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
img=np.array(img)
img=transform(img)
img=img.unsqueeze(0)
print(img.size())
results = [conv_layers_r[0](img)]
for i in range(1, len(conv_layers_r)):
results.append(conv_layers_r[i](results[-1]))
outputs = results
for num_layer in range(len(outputs)):
plt.figure(figsize=(50, 10))
layer_viz = outputs[num_layer][0, :, :, :]
layer_viz = layer_viz.data
print("Layer ",num_layer+1)
for i, filter in enumerate(layer_viz):
if i == 16:
break
plt.subplot(2, 8, i + 1)
filter = filter.cpu()
plt.imshow(filter, cmap='gray')
plt.axis("off")
plt.show()
plt.close()
img = img.to(device)
no_of_layers=0
conv_layers=[]
model_children=list(model_vgg16.children())
for child in model_children:
if type(child)==nn.Conv2d:
no_of_layers+=1
conv_layers.append(child)
elif type(child)==nn.Sequential:
for layer in child.children():
if type(layer)==nn.Conv2d:
no_of_layers+=1
conv_layers.append(layer)
print(no_of_layers)
results = [conv_layers[0](img)]
for i in range(1, len(conv_layers)):
results.append(conv_layers[i](results[-1]))
outputs = results
for num_layer in range(len(outputs)):
plt.figure(figsize=(50, 10))
layer_viz = outputs[num_layer][0, :, :, :]
layer_viz = layer_viz.data
print("Layer ",num_layer+1)
for i, filter in enumerate(layer_viz):
if i == 16:
break
plt.subplot(2, 8, i + 1)
filter = filter.cpu()
plt.imshow(filter, cmap='gray')
plt.axis("off")
plt.show()
plt.close()